rm(list=ls())

####################################################################################################
#Load libraries                                                                                    #  
#Users will need to install packages using install.packages() if using packages for the first time #
####################################################################################################
library(MASS)
library(RColorBrewer)

############
#Functions #
############
ff <- function(x) formatC(x, digits=2, format="f")
ff2 <- function(x) formatC(x, digits=0, format="f", big.mark = ",")
ff3 <- function(x) ff2(round(x,-2))
ff4 <- function(x) formatC(x, digits=1, format="f")

last <- function(x) x[length(x)]

date.fxn <- function(start, end) {
  start.date <- as.Date(start, tz="UTC")
  end.date <- strptime(end, format="%Y-%m-%d", tz="UTC")
  exposure.duration <- as.numeric(difftime(as.POSIXct(end.date), as.POSIXct(start.date, tz="UTC"), units="days")) + 1 
  return(exposure.duration)
}

################################################################################
#US case fatality rates and case distribution, both age group-specific         #
#Source: https://www.cdc.gov/mmwr/volumes/69/wr/mm6912e2.htm?s_cid=mm6912e2_w  #
################################################################################
case.data.us <- read.csv("covid19_case_distriution_case_fatality_us.csv")
case.distribution.usa <- prop.table(case.data.us$cases)
case.fatality.pt.est.usa <- case.data.us$case.fatality.pt.est/100
case.fatality.lower.usa <- case.data.us$case.fatality.lower/100
case.fatality.upper.usa <- case.data.us$case.fatality.upper/100

###############################################################
#Read UN population data for US                               #
#Reported in 1000s                                            #
#Source: https://population.un.org/wpp/Download/Standard/CSV/ #
###############################################################
world.population <- read.csv("WPP2019_PopulationByAgeSex_Medium.csv")
popl.usa <- subset(world.population, Time==2020 & Location=="United States of America")
age.categories.usa <- c(-1, case.data.us$age.end[-length(case.data.us$age.end)], 120)
popl.usa$AgeCat <- cut(popl.usa$AgeGrpStart, age.categories.usa)
popl.usa.2020 <- by(1000*popl.usa$PopTotal, popl.usa$AgeCat, function(x) sum(x))

popl.usa.2018 <- subset(world.population, Time==2018 & Location=="United States of America")
age.categories.usa <- c(-1, case.data.us$age.end[-length(case.data.us$age.end)], 120)
popl.usa.2018$AgeCat <- cut(popl.usa.2018$AgeGrpStart, age.categories.usa)
popl.usa.stand.2018 <- prop.table(by(1000*popl.usa.2018$PopTotal, popl.usa.2018$AgeCat, function(x) sum(x)))

###############################################################
#US ASDR by cause of death                                    #
#Source: https://www.cdc.gov/nchs/data/databriefs/db355-h.pdf #
###############################################################
cod.usa <- c(163.6, 149.1, 48.0, 39.7, 37.1, 30.5, 21.4, 14.9, 12.9, 14.2)
names(cod.usa) <- c("Diseases of heart", "Malignant neoplasms", "Accidents  (unintentional injuries)", "Chronic lower respiratory diseases", "Cerebrovascular diseases", "Alzheimer disease", "Diabetes mellitus", "Influenza and pneumonia", "Nephritis, nephrotic syndrome, and nephrosis", "Intentional self-harm (suicide)")

###########################################################################
#COVID-19 deaths by day                                                   #
#Source: NYT Online Repository (https://github.com/nytimes/covid-19-data) #
###########################################################################
us.states <- read.csv("us-states.csv")
remove <- c("Guam", "Northern Mariana Islands", "Puerto Rico", "Virgin Islands") #Note: District of Columbia remains in the dataset, along with all 50 US states
us.states <- us.states[-which(us.states$state %in% remove)]
us.deaths.all <- as.numeric(by(us.states$deaths, us.states$date, sum))
us.days <- 1:length(us.deaths.all)
us.deaths <- data.frame(time=1:length(us.deaths.all), deaths=us.deaths.all)
us.deaths2 <- subset(us.deaths, deaths>=1)
us.deaths2$time <- 1:nrow(us.deaths2)
us.deaths2$daily.deaths <- c(diff(us.deaths2$deaths),NA)
us.deaths2$cum.daily.deaths <- cumsum(us.deaths2$daily.deaths)
us.deaths2$log.cum.daily.deaths <- log(us.deaths2$cum.daily.deaths)

############################
#Fit logistic growth curve #
############################
m1<-nls(N_deaths~SSlogis(time,alpha,xmid,scale),data=data.frame(N_deaths=us.deaths2$log.cum.daily.deaths,time=us.deaths2$time),
        start=list(alpha=4,xmid=5,scale=1))

alpha.pt.est = summary(m1)$coef["alpha","Estimate"]
alpha.lower = summary(m1)$coef["alpha","Estimate"]-qnorm(0.975)*summary(m1)$coef["alpha","Std. Error"]
alpha.upper = summary(m1)$coef["alpha","Estimate"]+qnorm(0.975)*summary(m1)$coef["alpha","Std. Error"]
scale.pt.est = summary(m1)$coef["scale","Estimate"]
scale.lower = summary(m1)$coef["scale","Estimate"]-qnorm(0.975)*summary(m1)$coef["scale","Std. Error"]
scale.upper = summary(m1)$coef["scale","Estimate"]+qnorm(0.975)*summary(m1)$coef["scale","Std. Error"]
xmid.pt.est = summary(m1)$coef["xmid","Estimate"]
xmid.lower = summary(m1)$coef["xmid","Estimate"]-qnorm(0.975)*summary(m1)$coef["xmid","Std. Error"]
xmid.upper = summary(m1)$coef["xmid","Estimate"]+qnorm(0.975)*summary(m1)$coef["xmid","Std. Error"]
deaths.pt.est <- exp(alpha.pt.est/(1+exp((xmid.pt.est-c(1:365))/scale.pt.est)))
deaths.lower <- exp(alpha.lower/(1+exp((xmid.lower-c(1:365))/scale.lower)))
deaths.upper <- exp(alpha.upper/(1+exp((xmid.upper-c(1:365))/scale.upper)))
deaths.by.age.pt.est <- prop.table(case.data.us$cases*case.data.us$case.fatality.pt.est/100)*deaths.pt.est[length(deaths.pt.est)]
deaths.by.age.lower <- prop.table(case.data.us$cases*case.data.us$case.fatality.pt.est/100)*deaths.lower[length(deaths.lower)]
deaths.by.age.upper <- prop.table(case.data.us$cases*case.data.us$case.fatality.pt.est/100)*deaths.upper[length(deaths.upper)]
mx.age.pt.est <- 100000 * deaths.by.age.pt.est / popl.usa.2020
mx.age.lower <- 100000 * deaths.by.age.lower / popl.usa.2020
mx.age.upper <- 100000 * deaths.by.age.upper / popl.usa.2020
asdr.pt.est <- sum(mx.age.pt.est * popl.usa.stand.2018)
asdr.lower <- sum(mx.age.lower * popl.usa.stand.2018)
asdr.upper <- sum(mx.age.upper * popl.usa.stand.2018)
cod.usa.pt.est <- c(cod.usa, asdr.pt.est); names(cod.usa.pt.est) <- c(names(cod.usa), "covid")
cod.usa.lower <- c(cod.usa, asdr.lower); names(cod.usa.lower) <- c(names(cod.usa), "covid")
cod.usa.upper <- c(cod.usa, asdr.upper); names(cod.usa.upper) <- c(names(cod.usa), "covid")
start.date <- "2020-02-29" #first reported COVID-19 death in US

######################################################
#Graph estimated number of deaths over 1-year period #
######################################################
ymax <- max(deaths.pt.est)
cairo_pdf("total_deaths_usa.pdf", width=5.5, height=5.5)
par (mfrow=c(1,1), bg="white", mar=c(2,4,2,2), oma=c(2,2,2,2), font=1, cex=0.9, cex.main=0.9)
plot(1:365, deaths.pt.est, xlim=c(1,365), ylim=c(0,ymax), type="l", bty="l", xlab=NA, ylab=NA, xaxt="n", yaxt="n")
axis(1, at=seq(50,365,25))
axis(2, at=seq(0,1.2*10^5,10^4), paste(formatC(seq(0,1.2*10^5,10^4), digits=0, format="f", big.mark = ",")),las=1)
points(us.deaths2$time, us.deaths2$deaths, col="grey10", pch=1, cex=1)
grid(lty=3, col="grey")
lines(1:365, deaths.pt.est, lwd=1)
legend("bottomright", pch=c(1,NA), lty=c(NA,1), bty="n", paste(c("Observed", "Estimated")))
text(250, 1.1*10^5, paste("Total Number of Deaths: ", ff2(round(deaths.pt.est[length(deaths.pt.est)],-2)), sep=""), cex=1)
mtext("Total Number of Deaths", side=2, line=0.5, outer=TRUE, at=1/2, cex=1)
mtext("Days Since First Reported Death", side=1, line=0.5, outer=TRUE, at=1/2, cex=1)
dev.off()

###################################################################################
#Graph deaths per day, total deaths by age group, and mortality rate by age group #
###################################################################################
blue <- brewer.pal(3,"Set1")[2]; red <- brewer.pal(5,"Greys")[4]
cairo_pdf("death_results_usa.pdf", height=11, width=8.5)
par (mfrow=c(3,1), mgp=c(2.75,1,0)*0.55, mar=c(2.5,5.0,2.5,2.5), omi=c(0.2,0.5,0,0), tcl=-0.25, bg="white", cex=1.0, cex.main=0.9)
days.select <- 1:150
barplot(diff(deaths.pt.est), ylim=c(0, 4200), xlim=c(0,150), axes=F, col=red, border=red, xaxt="n", yaxt="n", xlab="Number of Days Since First Reported Death", ylab=NA)
title(ylab="Number", line=4)
xpos <- barplot(diff(deaths.pt.est), plot=FALSE)
axis(1)#, at=xpos[seq(1,365,30)], labels=seq(1,365,30), line=-0)
axis(2, at=seq(0,4000,500), las=1, paste(ff2(seq(0,4000,500))))
abline(h=seq(0,4000,500), col="grey80", lty=3)
barplot(diff(deaths.pt.est), axes=F, col=red, border=red, xaxt="n", yaxt="n", add=TRUE)
text(xpos[rev(order(diff(deaths.pt.est)))[1]], rev(sort(diff(deaths.pt.est)))[1], paste("Peak Deaths: ", ff2(round(rev(sort(diff(deaths.pt.est)))[1],-2)), "\nDay ", rev(order(diff(deaths.pt.est)))[1], " (", "16 April 2020", ")", sep=""), pos=3, cex=0.75)
title("A. Daily Deaths", font.main=1, cex=1)

ymin <- 0; ymax <- max(deaths.by.age.pt.est); y.additional <- 5000
barplot(deaths.by.age.pt.est, col=red, border=red, xaxt="n", yaxt="n", ylim=c(ymin, ymax+y.additional), xlab="Age Group (Years)", ylab=NA)
title(ylab="Number", line=4)
xpos <- barplot(deaths.by.age.pt.est, plot=FALSE)
axis(1, at=xpos, tick=FALSE, line=-0.5, labels=c("0-19", "20-44", "45-54", "55-64", "65-74", "75-84", "\u226585"))
axis(2, at=seq(0, 50000, 5000), paste(ff2(seq(0,50000,5000))), las=1)
abline(h=seq(0,50000,5000), col="grey80", lty=3)
barplot(deaths.by.age.pt.est, col=red, border=red, xaxt="n", yaxt="n", ylim=c(ymin, ymax+y.additional), add=TRUE)
text(xpos, deaths.by.age.pt.est, formatC(round(deaths.by.age.pt.est, -2), format="f", big.mark=",", digits=0), pos=3, cex=0.75)
#text(mean(xpos[1]), 45000, paste("Total Deaths:", ff2(round(sum(deaths.by.age.pt.est),-2)), sep=""), cex=1.0, pos=4)
title("B. Total Deaths by Age Group", font.main=1, line=-0.25)

ymin <- 0; ymax <- max(mx.age.pt.est); y.additional <- 60
barplot(mx.age.pt.est, col=red, border=red, xaxt="n", yaxt="n", ylim=c(ymin, ymax+y.additional), xlab="Age Group (Years)", ylab=NA)
title(ylab="Deaths per 100,000 Person-Years", line=2.5)
xpos <- barplot(mx.age.pt.est, plot=FALSE)
axis(1, at=xpos, tick=FALSE, line=-0.5, labels=c("0-19", "20-44", "45-54", "55-64", "65-74", "75-84", "\u226585"))
axis(2, at=seq(0, 700, 100), paste(ff2(seq(0,700,100))), las=1) 
abline(h=seq(0,700,100), col="grey80", lty=3)
barplot(mx.age.pt.est, col=red, border=red, xaxt="n", yaxt="n", ylim=c(ymin, ymax+y.additional), add=TRUE)
text(xpos, mx.age.pt.est, formatC(round(mx.age.pt.est,1), format="f", big.mark=",", digits=1), pos=3, cex=0.75)
text(mean(xpos)-2, 550, paste("ASDR: ", round(asdr.pt.est,1), " Deaths per 100,000 Person-Years", sep=""), cex=0.75, pos=4)
title("C. Mortality Rate by Age Group", font.main=1, pos=3, line=-0.25, cex=1.0)
dev.off()

##############################################################
#Create table of mortality results with confidence intervals #
##############################################################
usa.death.results <- 
  t(data.frame(total.deaths=ff3(c(last(deaths.pt.est), last(deaths.lower), last(deaths.upper))),
           peak.level=ff3(c(round(max(diff(deaths.pt.est))), round(max(diff(deaths.lower))), round(max(diff(deaths.upper))))),
           peak.day=c(rev(order(diff(deaths.pt.est)))[1], rev(order(diff(deaths.lower)))[1], rev(order(diff(deaths.upper)))[1]),
           peak.date=c(as.Date(as.Date(start.date)+rev(order(diff(deaths.pt.est)))[1]), as.Date(as.Date(start.date)+rev(order(diff(deaths.lower)))[1]), as.Date(as.Date(start.date)+rev(order(diff(deaths.upper)))[1])),
           asdr=ff4(c(asdr.pt.est, asdr.lower, asdr.upper)),
           asdr.rank=c(which(names(rev(sort(cod.usa.pt.est)))=="covid"), ifelse(which(names(rev(sort(cod.usa.lower)))=="covid")>10, ">10", which(names(rev(sort(cod.usa.lower)))=="covid")), which(names(rev(sort(cod.usa.upper)))=="covid"))))
names(usa.death.results) <- c("pt.est", "lower", "upper")
write.csv(usa.death.results, file="usa_death_results.csv")